*************************************************************************************************
/*																								
	Purpose: Cleans raw OCG 1962 data																					*
	Creates: OCG62_IGEanalysis.dta											
*/																								
*************************************************************************************************

clear 
set more off

cd "$Mydirectory1/1_DataSources/CPS_1962_1973/"

*------------------------------------------------------------------------------*

use ./output/ocg1962.dta

* Drop negative weight and then divide by 100 (following codebook instructions)
drop if weight<0
replace weight = weight/100

***************
*** RESTRICT SAMPLE
***************

* Foreignborn
	tab birthplace, m
	gen foreignborn = birthplace>=100 if birthplace<200
	label var foreignborn "Respondent is foreignborn"
	keep if (foreignborn==0 | foreignborn==.) 
	
* Age
	rename age_ccm age
	label var age "Respondent age"

* Restrict age
	keep if age>=30 & age<=50
	
**********************
*** DEMOGRAPHICS
*********************	

* Sex
	gen sex=1 //sample is all male

* Age squared
	gen agesq = age * age
	label var agesq "Age squared"

* No state, just region
	rename region region4
	replace region4 = region4+1

* Race
	replace race_ccm=. if race_ccm==2
	rename race_ccm race

	replace race=2 if race==1
	replace race=1 if race==0
	label define race3 1 "White" 2 "Black"
	label values race race3
	label var race "Respondent race"
	
	gen black = race==2 if race<.
	tab black, m
	label var black "Black"
		
* No union variable
	
* Marital status
	gen married = marital_ccm ==0 if marital_ccm<2
	label var married "Respondent is married"
	
	gen never_married = marital_status==6 
	tab never_married, m
	
	gen divorced = marital_status==4
	tab divorced, m
	
	gen widowed = marital_status==3
	tab widowed, m
	
	gen separated = marital_status==5
	tab separated, m 
	
* Fatherforeign
	tab birthplace_father
	gen fatherforeign = birthplace_father>0 & birthplace_father<100 if birthplace_father<100
	label var fatherforeign "Father is foreign"
	
**********************
*** BIRTHPLACE
*********************
	
* Birthplace 
	replace birthplace=. if birthplace==0
	
* Born in the south 
	gen bornsouth = birthplace==54 | birthplace==63 | birthplace==71 | birthplace==59 | birthplace==58 ///
		| birthplace==72 | birthplace==64 | birthplace==56 | birthplace==57 | birthplace==74 | birthplace==73 | birthplace==62
	label var bornsouth "R is born in South"
	
* Bpl variable recoded to FIPS codes using https://www.mcc.co.mercer.pa.us/dps/state_fips_code_listing.htm
	gen bpl=.
	replace bpl=23 if birthplace==11
	replace bpl=33 if birthplace==12
	replace bpl=50 if birthplace==13
	replace bpl=25 if birthplace==14
	replace bpl=44 if birthplace==15
	replace bpl=9 if birthplace==16

	replace bpl=36 if birthplace==21
	replace bpl=34 if birthplace==22
	replace bpl=42 if birthplace==23

	replace bpl=39 if birthplace==31
	replace bpl=18 if birthplace==32
	replace bpl=17 if birthplace==33
	replace bpl=26 if birthplace==34
	replace bpl=55 if birthplace==35

	replace bpl=27 if birthplace==41
	replace bpl=19 if birthplace==42
	replace bpl=29 if birthplace==43
	replace bpl=38 if birthplace==44
	replace bpl=46 if birthplace==45
	replace bpl=31 if birthplace==46
	replace bpl=20 if birthplace==47

	replace bpl=10 if birthplace==51
	replace bpl=24 if birthplace==52
	replace bpl=11 if birthplace==53
	replace bpl=51 if birthplace==54
	replace bpl=54 if birthplace==55
	replace bpl=37 if birthplace==56
	replace bpl=45 if birthplace==57
	replace bpl=13 if birthplace==58
	replace bpl=12 if birthplace==59

	replace bpl=21 if birthplace==61
	replace bpl=47 if birthplace==62
	replace bpl=1 if birthplace==63
	replace bpl=28 if birthplace==64

	replace bpl=5 if birthplace==71
	replace bpl=22 if birthplace==72
	replace bpl=40 if birthplace==73
	replace bpl=48 if birthplace==74

	replace bpl=30 if birthplace==81
	replace bpl=16 if birthplace==82
	replace bpl=56 if birthplace==83
	replace bpl=8 if birthplace==84
	replace bpl=35 if birthplace==85
	replace bpl=4 if birthplace==86
	replace bpl=49 if birthplace==87
	replace bpl=32 if birthplace==88

	replace bpl=53 if birthplace==91
	replace bpl=41 if birthplace==92
	replace bpl=6 if birthplace==93
	replace bpl=2 if birthplace==94
	label var bpl "Birthplace, FIPS"
	
* Region childhood
	gen fips = bpl
	gen region4_born=.
	* Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
	replace region4_born=1 if fips==9 | fips==23 | fips==25 | fips==33 | fips==44 | fips==50 | fips==34 | fips==36 | fips==42
	* Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
	replace region4_born=2 if fips==17 | fips==18 | fips==26 | fips==39 | fips==55 | fips==19 | fips==20 | fips==27 | fips==29 | fips==31 | fips==38 | fips==46
	/* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
			  Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
	replace region4_born=3 if fips==10 | fips==11 | fips==12 | fips==13 | fips==24 | fips==37 | fips==45 | fips==51 | fips==54 ///
	| fips==1 | fips==21 | fips==28 | fips==47 | fips==5 | fips==22 | fips==40 | fips==48
	* West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington 
	replace region4_born=4 if fips==4 | fips==8 | fips==16 | fips==30 | fips==32 | fips==35 | fips==49 | fips==56 | fips==6 | fips==41 | fips==53 | fips==2 | fips==15
	label var region4_born "Region R born"
	drop fips
	
* Moved regions
	gen moved_region = region4 != region4_born if region4_born<.
	label var moved_region "Moved Census region"
	
**********************
*** EDUCATION
*********************

* Educational categories (to match ANES)
	gen eduR=.
	replace eduR=0 if schl_ccm==0 //no schooling
	replace eduR=1 if schl_ccm>=1 & schl_ccm<=2 //some grade school
	replace eduR=2 if schl_ccm==3 //completed 8th grade
	replace eduR=3 if schl_ccm==4 //some HS
	replace eduR=4 if schl_ccm==5 //4 years of HS
	replace eduR=5 if schl_ccm==6 //1-3 years of college
	replace eduR=6 if schl_ccm==7 | schl_ccm==8 //4 or more years of college (like BA)
	label var eduR "Respondent education, consistent bins"	
	//Note: No variable that records whether R completed a given grade.
	
	gen yrsschool=.
	replace yrsschool = 16 if schl_ccm==7 | schl_ccm==8
	replace yrsschool = 14 if schl_ccm==6
	replace yrsschool = 12 if schl_ccm==5
	replace yrsschool = 10 if schl_ccm==4
	replace yrsschool = 8 if schl_ccm==3
	replace yrsschool = 6 if schl_ccm==2
	replace yrsschool = 3 if schl_ccm==1
	replace yrsschool = 0 if schl_ccm==0
	label var yrsschool "Years of school"
	
	gen yrsschool_bin=.
	replace yrsschool_bin = 0 if schl_ccm==0
	replace yrsschool_bin = 6 if inrange(schl_ccm,1,2)
	replace yrsschool_bin = 8 if eduR==2
	replace yrsschool_bin = 10 if eduR==3
	replace yrsschool_bin = 12 if eduR==4
	replace yrsschool_bin = 14 if eduR==5
	replace yrsschool_bin = 16 if eduR==6
	label var yrsschool_bin "Years of school, binned"
	
	gen hs_ed = eduR>=4 if eduR<.
	gen coll_ed = eduR>=6 if eduR<.
	label var hs_ed "HS educated" 
	label var coll_ed "College educated"
	
* Educational categories for father 
	gen edu_dad=.
	replace edu_dad=0 if schl_dad_ocg==0 //none
	replace edu_dad=1 if schl_dad_ocg>=1 & schl_dad_ocg<=2 //some grade school
	replace edu_dad=2 if schl_dad_ocg==3 //completed 8th grade
	replace edu_dad=3 if schl_dad_ocg==4 //some HS
	replace edu_dad=4 if schl_dad_ocg==5 //4 years of HS
	replace edu_dad=5 if schl_dad_ocg==6 //some college
	replace edu_dad=6 if schl_dad_ocg==7 | schl_dad_ocg==8 //college
	label var edu_dad "Educational categories for dad" 
	
	* binned
	gen edu_dad_bin=0 if edu_dad==0 
	replace edu_dad_bin=6 if edu_dad==1 
	replace edu_dad_bin=8 if edu_dad==2 
	replace edu_dad_bin=10 if edu_dad==3 
	replace edu_dad_bin=12 if edu_dad==4 
	replace edu_dad_bin=14 if edu_dad==5 
	replace edu_dad_bin=16 if edu_dad==6 
	tab edu_dad_bin, m
	label var edu_dad_bin "Years of school from bins"
	
	gen dad_hs_ed = edu_dad>=4 & edu_dad<.
	gen dad_coll_ed = edu_dad>=6 & edu_dad<.
	label var dad_hs_ed "Dad HS educated" 
	label var dad_coll_ed "Dad College educated"
	
	*Note: Can't create yrsschool_dad bc of number of available edu categories.
	
* Employment variable
	tab employment, m 
	gen employed = employment==4 | employment==0 //employed or in armed forces
	label var employed "Respondent is employed"

************
* Siblings *
************

*# Brothers--already coded up in 1_ocg1962_clean.do. Name of variable: R_num_brothers
*# Sisters--already coded up in 1_ocg1962_clean.do. Name of variable: R_num_sisters

* Flag indeterminate (high) number of sisters. Note: Not an issue for R_num_brothers variable.
	gen flag_9plussisters = (R_num_sisters==9) if R_num_sisters<.
	tab flag_9plussisters, m
	label var flag_9plussisters "Flag =1 if R has an indeterminate (9+) # of sisters"

*# of siblings
	gen R_num_siblings = R_num_sisters + R_num_brothers if (R_num_sisters!=. & R_num_brothers!=.)
	replace R_num_siblings = R_num_sisters if (R_num_sisters!=. & R_num_brothers==.)
	replace R_num_siblings = R_num_brothers if (R_num_sisters==. & R_num_brothers!=.)
	tab R_num_siblings, m
	label var R_num_siblings "# of R's siblings"

*****************
* Own fertility *
*****************

* # (living) boys--not available
* Flag indeterminate (high) # of boys--not available
* # (living) girls--not available
* Flag indeterminate (high) # of girls--not available
* # of living kids--not available	
* # of deceased kids--not available
	
* # of kids ever--coded up in 1_ocg1962_clean.do. Name of variable: R_numkids_ever. 
/*Note: Variable in codebook is phrased as "number of children ever born to wife". 
        Only other related variable is "# of own kids under age of 5". */
	tab R_numkids_ever, m
	label var R_numkids_ever "# of kids that R has ever had (living or deceased)"

* Dummy: Has R ever had kids?
	gen R_kids_ever = R_numkids_ever!=0 if R_numkids_ever<.
	tab R_kids_ever,m 
	label var R_kids_ever "Dummy=1 if R has ever had kids"

*Dummy: Does R have kids right now?
//Note: Unable to code up because # of living kids is unavailable.

*********************************
* # of persons in R's household *
*********************************

* # of kids (of any age) living in R's hh--not available
* # of children 0-17 living in R's hh--not available
* total # of people living in R's household--not available

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

***************
*** OCCUPATION OF FATHER AND SON
***************

foreach var of varlist occupationR dad_occ1960 {

	rename `var' census1960 
	replace census1960=. if census1960==1000

* Crosswalk here the 1960 occupations to the ANES occs (merging adds one variable: fatheroccej)
	merge m:1 census1960 using ../Crosswalks/Crosswalk_1960Census_toANES.dta
	assert census1960==. if _merge==1 
	drop if _merge==2
	drop _merge

* Replace fathers who are self-employed managers (currently coded as 28) with 21 if they are self-employed.
	if "`var'"=="dad_occ1960" {
	replace fatheroccej=21 if fatheroccej==28 & dad_clsswrk==2
	
	}
	if "`var'"=="occupationR" {
	replace fatheroccej=21 if fatheroccej==28 & son_clsswrk==2
	}

/*	Fix father occupation (fatheroccej--for fathers only), 
    Code mother occupation,
    Dummies for head of household when R was growing up.
*/
	if "`var'"=="dad_occ1960" {
	
	clonevar fatheroccej_nochange = fatheroccej
	label var fatheroccej_nochange "father or head of hh occupation--unchanged since merge with ANES occs"
	
	//Fix fatheroccej
	replace fatheroccej =. if inlist(R_wholivedw_age16,2,4)
	tab fatheroccej,m 
	assert fatheroccej==. if fatheroccej_nochange==. | inlist(R_wholivedw_age16,2,4) 

	//Mother occupation
	gen motheroccej = fatheroccej_nochange if (R_wholivedw_age16==2 | R_wholivedw_age16==4)
	tab motheroccej,m 
	assert motheroccej==. if fatheroccej_nochange==. | inlist(R_wholivedw_age16,0,1,3,.) 
	
	//Dummies for head of household when R was growing up
	tab R_wholivedw_age16, gen(headofhh_)

	foreach num of numlist 1/5 {
	
	local n = `num'-1
	di "`n'"
	ren headofhh_`num' headofhh_`n'
	
	}
	
	tab headofhh_0, m 
	tab headofhh_1, m
	replace headofhh_1 =1 if headofhh_0 ==1 //Note: code father as head of hh for Rs who lived with both parents when growing up.
	tab headofhh_1,m 
	drop headofhh_0
	
	ren headofhh_1 headofhh_father
	ren headofhh_2 headofhh_mother
	ren headofhh_3 headofhh_othermale
	ren headofhh_4 headofhh_otherfemale
	
	foreach name in father mother othermale otherfemale {
		assert headofhh_`name' ==. if R_wholivedw_age16==.
	}
	
	label var headofhh_father "Head of hh when R was growing up was R's father"
	label var headofhh_othermale "Head of hh when R was growing up was some other male (not R's father)"
	label var headofhh_otherfemale "Head of hh when R was growing up was some other female (not R's mother)"
	label var headofhh_mother "Head of hh when R was growing up was R's mother"

	//Alternate dummy for father as head of hh during R's childhood.  
	/*Note: When R reports occupation of a parent but does not report who R lived with at age 16, 
	        will assume that R lived with father.*/
	gen headofhh_father_imputed = headofhh_father
	replace headofhh_father_imputed =1 if fatheroccej_nochange!=. & R_wholivedw_age16==.
	label var headofhh_father_imputed "Impute dad when parent occ != missing & no info about hh head at age 16"
	}

	rename census1960 `var'
	
	* Rename variable for daughters
	if "`var'"=="occupationR" {
	
		rename fatheroccej occRej
		label var occRej "Respondent occupation, coarsened"
	}
		
	}
	
	label var fatheroccej "Father occupation, coarsened"

*****************************************************************************************************
* DUMMIES FOR WHEN WE KNOW WHY DAD OR MOM DIDN'T WORK (I.E. WHY FATHEROCCEJ/MOTHEROCCEJ IS MISSING) *
*****************************************************************************************************
	/*Note: For each R we know if R reported mom occ OR if R reported dad occ. We never know both occupations.
	Similar to the coding in avtmh76, father_notworking will be coded as "." for all Rs who only report mom occ 
	or who report dad occ but it's missing. Same logic used to code up mother_notworking.*/
	
	gen father_notworking =.
	replace father_notworking =0 if fatheroccej!=. & inlist(R_wholivedw_age16,0,1,3,.)
	tab father_notworking,m 
	
	gen mother_notworking =.
	replace mother_notworking =0 if motheroccej!=. & inlist(R_wholivedw_age16,2,4)
	tab mother_notworking,m 
	
* Variable for father being either farm laborer or operator
	gen fatherfarm=0
	replace fatherfarm=. if fatheroccej==.
	replace fatherfarm=1 if fatheroccej==71 | fatheroccej==81
	label var fatherfarm "Father in farming occupation"

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*
	
***************
*** FAMILY INCOME 
***************

	gen fam_inc =.
	replace fam_inc=0.75*2000 if inrange(income_total_ccm,0,4) //0-<2k
	replace fam_inc=2500 if income_total_ccm==5 | income_total_ccm==6 //2-3
	replace fam_inc=3500 if income_total_ccm==7 | income_total_ccm==8 //3-4
	replace fam_inc=4500 if income_total_ccm==9 | income_total_ccm==10 //4-5
	replace fam_inc=5500 if income_total_ccm==11 //5-6
	replace fam_inc=6500 if income_total_ccm==12 //6-7
	replace fam_inc=7500 if income_total_ccm==13 //7-8 
	replace fam_inc=9000 if income_total_ccm==14 //8-10 
	replace fam_inc=12500 if income_total_ccm==15 //10-15
	replace fam_inc=15000*1.25 if income_total_ccm==16  | income_total_ccm==17
	label var fam_inc "Family income, midpoints"
	
	gen bottomcoded_son = fam_inc==0.75*2000 if fam_inc<.
	label var bottomcoded_son "Son income, bottom coded"
	gen topcoded_son = fam_inc==1.25*15000 if fam_inc<.
	label var topcoded_son "Son income, top coded"
	
	/* We turn fam_inc into 1950 dollars using the CPI: https://data.bls.gov/timeseries/CUUR0000SA0 */
	gen CPI1950 = 24.1
	gen CPI1961 = 29.9 //1961 is used because ocg is part of March CPS supplement.

	gen fam_inc_real =.
	replace fam_inc_real = fam_inc * (CPI1950/CPI1961) 
	label var fam_inc_real "Family income, in 1950 dollars"
	
	gen lnfaminc= ln(fam_inc_real) 
	label var lnfaminc "Logged total family income, real"
	
*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

***************
*** BIRTH COHORTS 
***************

	gen year = 1962 
	label var year "Year of survey"

	gen dob=year-age-1 //March supplement means we'll subtract one.
	label var dob "Year of birth"
	
* Assign everyone the decade in which they were born.
	tab dob, m
	gen decade=.
	replace decade=1910 if dob>=1910 & dob<=1919
	replace decade=1920 if dob>=1920 & dob<=1929
	replace decade=1930 if dob>=1930 & dob<=1939
	label var decade "Decade of birth"
	
* Generate dummies for each decade
	tab decade, gen(decade_)

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

***************
*** INTERACTION TERMS
***************

	global institution_list "black hs_ed coll_ed"

	* Demean the variables that we will use
	foreach var in $institution_list {
	sum `var'
	gen `var'_dm = `var'- `r(mean)'
	local temp: var label `var' 
	label var `var'_dm "`temp', demeaned" 
	}

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

***************
*** SAVE
***************
	
	*Rename unique identifier
	ren id id_ocg62
	label var id_ocg62 "ID number (unique identifier)"

	duplicates report id_ocg62 //no duplicates reported

	compress
	sort id_ocg62
	ren weight weight_ocg62
	order id_ocg62 weight_ocg62
	save ./output/OCG62_IGEanalysis.dta, replace
